# Replication code for Kruks-Wisner 2022, Social Brokers: Social Brokerage: Accountability and the Social Life of Information, Comparative Political Studies #

# Note: drawing on dataset "VV_video_database_fall2020.csv" #
# See "Codebook_VVData_Social Brokers_CPS2022_cleaned" # 


library(dplyr)
library(haven)
library(tidyverse)
library(lubridate)
library(stargazer)
library(ggplot2)
library(plyr)
library(tidyverse)

setwd("~/Downloads")

video_database <- read_csv("VV_video_database_fall2020.csv")


#################################################################################################

#1. NOTE ON CALCULATION OF IMPACT RATE #
# Captures ratio of total impacts achieved to total issue videos produced per CC #

# Variable created in "VV_video_database_fall2020," as follows: #

#find number of videos per cc where is_impact is true#
video_database_impact <- filter(video_database, is_impact==TRUE)

n_occur_impact <- data.frame(table(video_database_impact$cc_id)) 
n_occur_impact <- rename(n_occur_impact, cc_id=Var1)
n_occur_impact <- rename(n_occur_impact, cc_total_impacts=Freq)

intersect(names(video_database), names(n_occur_impact))
data.temp <- select(video_database, cc_id)
data.temp <- unique(data.temp)
nrow(data.temp)
nrow(video_database)

video_database <- mutate(video_database, cc_id = fct_recode(as.factor(cc_id)))

check1 <- anti_join(video_database, n_occur_impact, by="cc_id")
check2 <- anti_join(n_occur_impact, video_database, by="cc_id")

#merge to create database with new variable "cc_total_impacts"#
video_database <- full_join(video_database, n_occur_impact)


# Creating new variable for impact rate per CC # 
video_database <- mutate(video_database, cc_impact_rate = (cc_total_impacts / cc_total_issues_produced))

#exporting database
video_database <- write_csv(video_database, "VV_video_database_fall2020.csv")


##################################################################################################

#2. DATA ANALYSIS ON IMPACT RATE #

video_database <- read_csv("VV_video_database_fall2020.csv")


#A. Average impact #

#for full sample#
summary(video_database$cc_impact_rate)

#for qual study sample#
video_database_qual_study <- filter(video_database, cc_qual_study=="Research Sample")


#B. FIGURE 1. Problem-Solving Impacts#
# Note: plots total number of issue videos produced per CC by total number of impacts achieved per CC" #

#cleaning video_database for use in graphics#
video_database_plot <- mutate(video_database, cc_qual_study = fct_recode(as.character(cc_qual_study),
                                                                         "Research Sample" = "1",
                                                                         "Non-sample" = "0"))
video_database_plot <- filter(video_database_plot, cc_state %in% c("Uttar Pradesh", "Bihar"))
video_database_plot <- filter(video_database_plot, cc_gender %in% c("Female", "Male", "female", "male"))
video_database_plot <- filter(video_database_plot, cc_social_movement_involvements %in% c(1, 0))
video_database_plot <- mutate(video_database_plot, cc_social_movement_involvements = fct_recode(as.character(cc_social_movement_involvements),
                                                                                                "Involved" = "1",
                                                                                                "Not involved" = "0"))
#creating educational categories#
video_database_plot <- mutate(video_database_plot, cc_educational_qualifications = fct_recode(as.factor(cc_educational_qualifications),
                                                                                              "BA" = "BCA",
                                                                                              "BA" = "BCA, Diploma Mass Communication",
                                                                                              "BA" = "BCom",
                                                                                              "BA" = "BSc",
                                                                                              "BA" = "BSW",
                                                                                              "MA" = "MA History",
                                                                                              "MA" = "MA Public Administration",
                                                                                              "MA" = "MA, BEd, ITI, DAMS",
                                                                                              "MA" = "MCom", 
                                                                                              "MA" = "MPhil",
                                                                                              "MA" = "MSc",
                                                                                              "MA" = "MSW",
                                                                                              "MA" = "MTM",
                                                                                              "12th" = "12th, BTI"))
#creating villages groups#
video_database_plot <- mutate(video_database_plot, cc_no_of_villages_working_with = fct_recode(as.factor(cc_no_of_villages_working_with))

#creating separate dataset of CCs in the research sample#                            
video_database_qual_study_plot <- filter(video_database_plot, cc_qual_study=="Research Sample")

                                                    
#creating plot for FIGURE 1.#
g1 <- ggplot(video_database_plot, aes(x=cc_total_issues_produced, y=cc_total_impacts, linetype=cc_qual_study, pch=cc_qual_study)) +
  geom_point() + geom_smooth(method="lm") + xlab("Issues Videos") + ylab("Impacts Achieved") +
  scale_color_manual(values = c("#1b98e0", "#353436")) +
  labs(linetype = "Community Correspondents", pch = "Community Correspondents")


#C. ONLINE APPENDIX #

# Figure B.1. Distribution of Impact Rates (Impact rate summary statistics by min, max, 1st, 3rd qu, median, mean)#
summary(video_database$cc_impact_rate)

#for last 12 months#
video_database_12month <- filter(video_database, story_pitch_date >= "2019-04-01" & story_pitch_date <= "2020-03-31")
video_database_12month <- write_csv(video_database_12month, "VV_video_database_fall2020_12month.csv")

summary(video_database_12month$cc_impact_rate)


# BY GENDER#

# Table C.1, impact rates by gender, 12 months# 

video_database_12month %>%
  group_by(cc_gender) %>%
  dplyr::summarise(
    count = n(),
    mean = mean(cc_impact_rate, na.rm = TRUE),
    sd = sd(cc_impact_rate, na.rm = TRUE)
  )


#Figure C.1, impact rates by gender, over time (full dataset); all CCs and qual study CCs in one plot

g_gender1 <- ggplot(video_database_plot, aes(x=cc_total_issues_produced, y=cc_total_impacts, col=cc_gender, pch=cc_gender)) +
  geom_point() + geom_smooth(method="lm") + xlab("Issues Videos") + ylab("Impacts Achieved") +
  scale_color_manual(values = c("#1b98e0", "#353436")) +
  labs(color = "Gender", pch = "Gender",
       caption = "Source: Video Volunteers database of 18,844 videos produced by CCs who have identified their gender. 
       CC impact rates are a ratio of the number of documented impacts they have achieved to the number of issue videos they have produced.")


g_gender2 <- ggplot(video_database_qual_study_plot, aes(x=cc_total_issues_produced, y=cc_total_impacts, col=cc_gender, pch=cc_gender)) +
  geom_point() + geom_smooth(method="lm") + xlab("Issues Videos") + ylab("Impacts Achieved") +
  scale_color_manual(values = c("#1b98e0", "#353436")) +
  labs(color = "Gender", pch = "Gender",
       caption = "Source: Video Volunteers database of 2,835 videos made by the 19 CCs in the research sample who have identified their gender. 
       CC impact rates are a ratio of the number of documented impacts they have achieved to the number of issue videos they have produced.")



# BY SOCIAL CATEGORY#
#Table C.2, Impact Rate by Social Category (12 months)

video_database_12month %>%
  group_by(cc_social_category) %>%
  dplyr::summarise(
    count = n(),
    mean = mean(cc_impact_rate, na.rm = TRUE),
    sd = sd(cc_impact_rate, na.rm = TRUE)
  )


# BY ISSUE TYPE#
#Table C.3, Impact by Issue (12 months)

issue_groups <- video_database_12month %>%
  group_by(iu_theme) %>%
  dplyr::summarise(
    count = n(),
    mean = mean(cc_impact_rate, na.rm = TRUE),
    sd = sd(cc_impact_rate, na.rm = TRUE)
  )
print(issue_groups, n=100)


# BY STATE#

# Table C.4, Impact by State (12 months)

state <- video_database_12month %>%
  group_by(cc_state) %>%
  dplyr::summarise(
    count = n(),
    mean = mean(cc_impact_rate, na.rm = TRUE),
    sd = sd(cc_impact_rate, na.rm = TRUE)
  )
print(state, n=100)

# Figure C.2. Impact rates over time, UP and Bihar (full dataset); all CCs and qual study in one plot

g_state1 <- ggplot(video_database_plot, aes(x=cc_total_issues_produced, y=cc_total_impacts, col=cc_state, pch=cc_state)) +
  geom_point() + geom_smooth(method="lm") + xlab("Issues Videos") + ylab("Impacts Achieved") +
  scale_color_manual(values = c("#1b98e0", "#353436")) +
  labs(color = "State", pch = "State",
       caption = "Source: Video Volunteers database of 7,066 videos produced by CCs living in UP and Bihar. 
       CC impact rates are a ratio of the number of documented impacts they have achieved to the number of issue videos they have produced.")


############################################################################################

# 3. DESCRIPTIVE STATISTICS: Demographic and socioeconomic features of the CCs"

# Note: drawing on dataset "all_cc_details_anonymous.csv" #


# Table 1. Demographic and socioeconomic features of the CCs
#Full network vs. Research Sample, summary statistics for:

cc_details <- read.csv("all_cc_details_anonymous.csv")

#making subset of 19 research CCs#
cc_details_research <- filter(cc_details, id %in% c(365, 231, 9, 288, 233, 
                                                    373, 221, 367, 242, 12, 305,
                                                    304, 19, 306,235, 225, 301,
                                                    18, 244))


#gender
table(cc_details$gender)
table(cc_details_research$gender)

#social category
table(cc_details$social_category)
table(cc_details_research$social_category)

#religion
table(cc_details$religion)
table(cc_details_research$religion)

#highest level of education
table(cc_details$educational_qualifications)
table(cc_details_research$educational_qualifications)

#monthly household income
summary(cc_details$monthly_household_income)
summary(cc_details_research$monthly_household_income)
                          

